f = open('onelevel')
bigf = open('FieldOfStudyChildren.nt')
dictf = open('cleardate')
wf = open('fos','w')

bigdict = {}
IDs = set()
foses = set()
total = ''
x = f.readline()
while x :
    x = x[x.index('entity/')+7:]
    id = x[:x.index('>')]
    IDs.add(id)
    x = x[x.index('"')+1:]
    fos = x[:x.index('"')]
    fos = fos.replace(' (computer science)','')
    foses.add(fos)
    # print(id)
    x = f.readline()

######创建字典
z = dictf.readline()
while z:
    z = z[z.index('entity/') + 7:]
    id = z[:z.index('>')]
    z = z[z.index('"') + 1:]
    fos = z[:z.index('"')]
    fos = fos.replace(' (computer science)', '')
    # if( "&#" in fos):
    #     fos = fos[z.index('&#')]
    bigdict[id] = fos
    z = dictf.readline()

# print(bigdict)

######开始搜寻
for i in range(5):
    y = bigf.readline()
    while y:
        y = y[y.index('entity/') + 7:]
        childId = y[:y.index('>')]
        y = y[y.index('entity/') + 7:]
        fatherId = y[:y.index('>')]
        if (fatherId in IDs):
            IDs.add(childId)
        y = bigf.readline()


#######字典对应放进set里
for item in IDs:
    wf.write(bigdict[item]+'\n')

